# Clean and merge data
# Attitudes toward executive aggrandizement
# Presidential winners coded in "lapop_winners.csv"

# SETUP --------------------------------


## Packages -------

library(tidyverse)
library(readstata13)
library(foreign)
library(haven)
library(countrycode)
library(reshape2)
library(readxl)
library(wbstats)
library(foreign)
library(conflicted)
library(psych)
library(here)

here()

# resolve package conflicts
conflict_prefer("select", "dplyr")
conflict_prefer("filter", "dplyr")
conflict_prefer("alpha", "psych")

## Load in LAPOP Data ------------------


load(here("data/lapop_slim.RData"))

# Country names
countrynames <- unique.data.frame(lapop.slim[, c("country", "iso3c", "pais")])

# Vector of country names
iso3.vec <- as.vector(countrynames$iso3c)


# CODE WINNERS AND NON_VOTERS  -----------------------------------

## A bit of cleaning and import --------------------------------

# Country-level data from excel sheet
country.winners <- read_csv(here("data/lapop_winners.csv"), na = "NA")

# Vote choice as a factor
lapop.slim <- lapop.slim %>%
  mutate(
    across(c(vb3_10, vb3_12, vb3n_14, vb3n_16, vb3n_18), as_factor, .names = "{.col}_fac")
  )

# Vote choice as numeric
lapop.slim <- lapop.slim %>%
  mutate(
    across(c(vb3_10, vb3_12, vb3n_14, vb3n_16, vb3n_18), ~ as.numeric(.))
  )

# Voted as factor
lapop.slim <- lapop.slim %>%
  mutate(vb2_fac = as_factor(vb2))

# Voted as numeric
lapop.slim <- lapop.slim %>%
  mutate(vb2 = as.numeric(vb2))


### Function to pull vb11 winner codes by wave -----------------
pullr <- function(x) {
  dat <- subset(country.winners, wave == x)
  dat <- as.vector(dat$vb3code)
  dat <- na.omit(dat)
}

## Code non-voters ---------------------------
lapop.slim$non_voter <- NA
lapop.slim$non_voter[lapop.slim$vb2 == 1] <- 0 # Voted
lapop.slim$non_voter[lapop.slim$vb2 == 2] <- 1 # Did not vote
lapop.slim$non_voter[lapop.slim$vb3_10 == 0] <- 1 # 2010 blank
lapop.slim$non_voter[lapop.slim$vb3_12 == 0] <- 1 # 2012 blank vote
lapop.slim$non_voter[lapop.slim$vb3n_14 %in% c(0, 97)] <- 1 # 2014 blank or null
lapop.slim$non_voter[lapop.slim$vb3n_16 %in% c(0, 97)] <- 1 # 2016 black or null
lapop.slim$non_voter[lapop.slim$vb3n_18 %in% c(0, 97)] <- 1 # 2018 blank or null

lapop.slim %>%
  count(non_voter, vb2_fac)

## Code winners ----------------------------------
lapop.slim$winner <- NA
lapop.slim$winner[lapop.slim$non_voter==1] <- 0

### Winners 2010 --------------------------
lapop.slim$winner[lapop.slim$wave == 2010 & !is.na(lapop.slim$vb3_10)] <- 0 # all non-missing to zeros
lapop.slim$winner[lapop.slim$wave == 2010 & lapop.slim$vb3_10 %in% pullr(2010)] <- 1 # winners

### Winners 2012 --------------------------
lapop.slim$winner[lapop.slim$wave == 2012 & !is.na(lapop.slim$vb3_12)] <- 0 # all non-missing to zeros
lapop.slim$winner[lapop.slim$wave == 2012 & lapop.slim$vb3_12 %in% pullr(2012)] <- 1 # winners

### Winners 2014 ----------------------------
lapop.slim$winner[lapop.slim$wave == 2014 & !is.na(lapop.slim$vb3n_14)] <- 0 # all non-missing to zeros
lapop.slim$winner[lapop.slim$wave == 2014 & lapop.slim$vb3n_14 %in% pullr(2014)] <- 1 # winners

### Winners 2016 ----------------------------
lapop.slim$winner[lapop.slim$wave == 2016 & !is.na(lapop.slim$vb3n_16)] <- 0 # all non-missing to zeros
lapop.slim$winner[lapop.slim$wave == 2016 & lapop.slim$vb3n_16 %in% pullr(2016)] <- 1 # winners

### Winners 2018 ----------------------------
lapop.slim$winner[lapop.slim$wave == 2018 & !is.na(lapop.slim$vb3n_18)] <- 0 # all non-missing to zeros
lapop.slim$winner[lapop.slim$wave == 2018 & lapop.slim$vb3n_18 %in% pullr(2018)] <- 1 # winners


## Factor Variable, winner, loser, non-voter

lapop.slim$pres_support <- NA
lapop.slim$pres_support[lapop.slim$winner == 0 & lapop.slim$non_voter == 0] <- "loser"
lapop.slim$pres_support[lapop.slim$winner == 1 & lapop.slim$non_voter == 0] <- "winner"
lapop.slim$pres_support[lapop.slim$winner == 0 & lapop.slim$non_voter == 1] <- "non-voter"


lapop.slim$pres_support <- factor(lapop.slim$pres_support, levels = c("loser", "non-voter", "winner"))

lapop.slim %>% 
  count(winner, non_voter, pres_support) %>% 
  na.omit()


# CODE PARTISAN WINNERS AND NO PID ---------------------------

## A bit of cleaning ---------------

# Has pid as factor
lapop.slim <- lapop.slim %>%
  mutate(vb10_fac = as_factor(vb10))

lapop.slim <- lapop.slim %>%
  mutate(vb10 = as.numeric(vb10))

# Party ID as a factor
lapop.slim <- lapop.slim %>%
  mutate(
    across(c(vb11_10, vb11_12, vb11_14, vb11_16, vb11_18), as_factor, .names = "{.col}_fac")
  )

# Party ID as numeric
lapop.slim <- lapop.slim %>%
  mutate(
    across(c(vb11_10, vb11_12, vb11_14, vb11_16, vb11_18), ~ as.numeric(.x))
  )

### Function to pull pid winners ---------------
pullr_id <- function(x) {
  dat <- subset(country.winners, wave == x)
  dat <- as.vector(dat$vb11code)
  dat <- na.omit(dat)
}

## Code Non-Partisans ----------------------
lapop.slim$non_partisan <- NA
lapop.slim$non_partisan[lapop.slim$vb10==2] <- 1
lapop.slim$non_partisan[lapop.slim$vb10==1] <- 0

## Code Partisan Winners -----------------
lapop.slim$pid_winner <- NA
lapop.slim$pid_winner[lapop.slim$non_partisan==1] <- 0 #non-partisans are zero

### Partisan Winners 2010 -----------------------
lapop.slim$pid_winner[lapop.slim$wave==2010 & !is.na(lapop.slim$vb11_10)] <- 0
lapop.slim$pid_winner[lapop.slim$wave==2010 & lapop.slim$vb11_10 %in% pullr_id(2010)] <- 1

### Partisan Winners 2012 -----------------------
lapop.slim$pid_winner[lapop.slim$wave==2012 & !is.na(lapop.slim$vb11_12)] <- 0
lapop.slim$pid_winner[lapop.slim$wave==2012 & lapop.slim$vb11_12 %in% pullr_id(2012)] <- 1

### Partisan Winners 2014 -----------------------
lapop.slim$pid_winner[lapop.slim$wave==2014 & !is.na(lapop.slim$vb11_14)] <- 0
lapop.slim$pid_winner[lapop.slim$wave==2014 & lapop.slim$vb11_14 %in% pullr_id(2014)] <- 1

### Partisan Winners 2016 -----------------------
lapop.slim$pid_winner[lapop.slim$wave==2016 & !is.na(lapop.slim$vb11_16)] <- 0
lapop.slim$pid_winner[lapop.slim$wave==2016 & lapop.slim$vb11_16 %in% pullr_id(2016)] <- 1

### Partisan Winners 2018 -----------------------
lapop.slim$pid_winner[lapop.slim$wave==2018 & !is.na(lapop.slim$vb11_18)] <- 0
lapop.slim$pid_winner[lapop.slim$wave==2018 & lapop.slim$vb11_18 %in% pullr_id(2016)] <- 1


# COUNTRY-LEVEL VARIABLES --------------------------------------

# COUNTRY VARS FROM WINNERS LAPOP FULL -----------------------

# Select other country vars from "winners lapop.csv"
country.vars <- country.winners %>%
  select(
    iso3c, wave, leader, pop.speech
  )

# Merge with LAPOP data
lapop.slim <- left_join(lapop.slim, country.vars)

## Mass Polarization ---------------------

lapop.slim <- lapop.slim %>%
  mutate(lrscale = as.numeric(l1))

masspol <- lapop.slim %>%
  select(iso3c, year, lrscale)

masspol$left <- ifelse(masspol$lrscale <= 3, 1, 0)
masspol$right <- ifelse(masspol$lrscale >= 8, 1, 0)

masspol <- na.omit(masspol)

masspol <- masspol %>%
  group_by(iso3c, year) %>%
  summarize(left = mean(left), right = mean(right)) %>%
  mutate(mass.polr = ((left * right) / .25))

lapop.slim <- left_join(lapop.slim, masspol)

rm(masspol)



## V-Dem Data ------------------------------------------


vdem10 <- readRDS(here("./data/V-Dem-CY-Full+Others-v10.rds"))

vdem10 <- vdem10 %>%
  select(
    iso3c = country_text_id, year,
    v2x_polyarchy, v2xel_frefair, respect_con = v2exrescon, v2x_libdem,
    v2xlg_legcon, v2x_jucon, v2x_corr,
    vdem.polar = v2cacamps, person.legit = v2exl_legitlead,
    perf.legit = v2exl_legitperf, party.links = v2psprlnks,
    v2xps_party, v2smpolsoc, v2psnatpar, v2psnatpar_ord, v2x_divparctrl
  )

lapop.slim <- left_join(lapop.slim, vdem10)

rm(vdem10)


## Database of Political Institutions ----------------------


dpi <- read_stata(here("data/DPI2020_stata13.dta"))

dpi$countryname[dpi$countryname == "Dom. Rep."] <- "Dominican Republic"
dpi$iso3c <- countrycode(dpi$countryname,
  origin = "country.name",
  destination = "iso3c"
)

dpi <- dpi %>%
  select(iso3c, year, maj, totalseats, numgov, yrsoffc, percent1, execrlc, polariz)

# Maj is missing for Argentina (recalculate to fix this) 
dpi <- dpi %>% 
  mutate(maj = numgov/totalseats)

lapop.slim <- left_join(lapop.slim, dpi)

rm(dpi)


# CODING SURVEY DATA ----------------------------------------

# Close congress and Court
lapop.slim <- lapop.slim %>%
  mutate(across(c(jc15a, jc16a), as.numeric),
    close.cong = recode(jc15a, `2` = 0),
    close.court = recode(jc16a, `2` = 0)
  )


# Individual-Level Populism 
lapop.slim <- lapop.slim %>% 
  mutate(across(starts_with("pop1"), as.numeric), 
         limit_oppo = (pop101 - 1), 
         govern_direct = (pop107 -1),
         min_threat =  (pop113 - 1))

lapop.slim %>% 
  select(limit_oppo, govern_direct, min_threat) %>% 
  na.omit() %>% 
  alpha()

lapop.slim <- lapop.slim %>% 
  mutate(
    pop_ind = ((limit_oppo + govern_direct + min_threat)/3)
  )


# Wealth
missing.wealth.vars <- lapop.slim %>%
  select(country, wave, r1:r26) %>%
  group_by(country, wave) %>%
  summarise(across(r1:r26, ~ sum(is.na(.x))))

# Drop missings
wealth.vars <- c("r1", "r3", "r4", "r4a", "r5", "r6", "r7", "r8", "r12", "r14", "r16")
lapop.slim <- drop_na(lapop.slim, any_of(wealth.vars))

# Conduct PCA
wealth.pca <- lapop.slim %>%
  select(all_of(wealth.vars)) %>%
  princomp()

# Rescale 0 - 1

rescale01 <- function(x) {
  rng <- range(x, na.rm = TRUE)
  (x - rng[1]) / (rng[2] - rng[1])
}

wealth.pca <- data.frame(wealth.pca$scores)
wealth.pca$wealth <- rescale01(wealth.pca$Comp.1)
wealth <- wealth.pca$wealth

# Bind to the data
lapop.slim <- cbind(lapop.slim, wealth)


# Rural
lapop.slim <- lapop.slim %>%
  mutate(
    rural =
      recode(as.numeric(ur), `2` = 1, `1` = 0)
  )

# Dem best form of gov
lapop.slim <- lapop.slim %>%
  mutate(dem.best =  as.numeric(ing4 - 1))

# Satisfaction with democracy
lapop.slim <- lapop.slim %>%
  mutate(sat_dem = 4 - as.numeric(pn4))


# Education, Age, Female
lapop.slim <- lapop.slim %>%
  mutate(
    edu = as.numeric(ed),
    female = recode(as.numeric(q1), `2` = 1, `1` = 0)
  )


# Economic Eval
lapop.slim <- lapop.slim %>%
  mutate(
    econ.soc = 3 - as.numeric(soct2),
    econ.ego = 3 - as.numeric(idio2),
    econ.eval = (econ.ego + econ.soc) / 2
  )


# Interpersonal trust
# 0 = Untrustworthy, 3 = Very Trustworthy
lapop.slim <- lapop.slim %>%
  mutate(trust.people = 4 - as.numeric(it1))


# Neighborhood Safety 
# 0 = Very Unsafe, 3 = Very Safe
lapop.slim <- lapop.slim %>%
  mutate(neigh.safe = 4 - as.numeric(aoj11))


# Make a Couple Vars Factors
lapop.slim <- lapop.slim %>%
  mutate(across(c(country, wave, iso3c), as.factor))

# Country Wave Factor
lapop.slim <- lapop.slim %>%
  mutate(cntry.wave = factor(paste(iso3c, wave, sep = ".")))

# Country Year Factor
lapop.slim <- lapop.slim %>%
  mutate(cntry.year = factor(paste(country, year, sep = ".")))

# Year as a factor
lapop.slim$year.fac <- as_factor(lapop.slim$year)


# VARIABLE MEANS BY COUNTRY ---------------------------------------

mean.na <- function(x) {
  mean(na.omit(x))
}

# Country-year mean of support for democracy
country.means.tj <- lapop.slim %>%
  group_by(iso3c, year) %>%
  summarise(
    dem.best.tj = mean.na(dem.best),
  )

# Append the country year means to lapop slim
lapop.slim <- left_join(lapop.slim, country.means.tj)


# SELECT AND SAVE ----------------------------------------------

lapop.cleaned <- lapop.slim %>%
  select(
    country, cntry.wave, year, cntry.year, year.fac, wave, iso3c, upm, strata, 
    estratopri, wt, weight1500, close.cong, close.court, winner, non_voter, pid_winner, 
    vb3n_18, non_partisan, trust.people, wealth,  neigh.safe, rural, dem.best, dem.best.tj,
    pop_ind, sat_dem, edu, female, econ.eval, polariz, v2x_libdem, vdem.polar,
    mass.polr, v2x_corr, pop.speech, maj)
    
    
# Slim data, with only recodes
save(lapop.cleaned, file = "./data/lapop_10_18.Rdata")

